{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of classes: 2\n",
      "Number of images: 493\n",
      "Loading feature extraction model\n",
      "Model directory: models\n",
      "Metagraph file: model-20180402-114759.meta\n",
      "Checkpoint file: model-20180402-114759.ckpt-275\n",
      "INFO:tensorflow:Restoring parameters from models/model-20180402-114759.ckpt-275\n",
      "emb_array.shape:\n",
      "(493, 512)\n",
      "Training classifier\n",
      "['lijun', 'wenjun']\n",
      "Saved classifier model to file \"new_models.pkl\"\n",
      "accuracy: 87.16%\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import cv2\n",
    "import facenet\n",
    "import os\n",
    "from os.path import join \n",
    "import sys\n",
    "import pickle\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import metrics \n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.neighbors import KNeighborsClassifier \n",
    "\n",
    "%matplotlib inline\n",
    "        \n",
    "def load_data(image_paths, image_size):\n",
    "    nrof_samples = len(image_paths)\n",
    "    images = []\n",
    "    for i in range(nrof_samples):\n",
    "        img = cv2.imread(image_paths[i])\n",
    "        #print(image_paths[i])\n",
    "        #print(img.shape)\n",
    "        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
    "        if gray.ndim == 2:\n",
    "            img = facenet.to_rgb(gray)\n",
    "        images.append(img)\n",
    "    return images\n",
    "\n",
    "# 训练数据存放于train_data目录下 格式如下\n",
    "#-->train_data:\n",
    "#     --->Abby:\n",
    "#            Abby1.jpg\n",
    "#            Abby2.jpg\n",
    "#            ...\n",
    "#     --->John:\n",
    "#           John1.jpg\n",
    "#           John2.jpg\n",
    "#           ...\n",
    "data_dir = './train_data'\n",
    "image_size = 160\n",
    "\n",
    "with tf.Graph().as_default():\n",
    "      \n",
    "    with tf.Session() as sess:\n",
    "            \n",
    "        np.random.seed(seed = 42)\n",
    "        dataset = facenet.get_dataset(data_dir)\n",
    "        \n",
    "        paths, labels = facenet.get_image_paths_and_labels(dataset)\n",
    "        print('Number of classes: %d' % len(dataset))\n",
    "        print('Number of images: %d' % len(paths))\n",
    "           \n",
    "        # 加载模型,模型位于models目录下\n",
    "        print('Loading feature extraction model')\n",
    "        facenet.load_model('models')\n",
    "            \n",
    "        # 获取输入和输出 tensors\n",
    "        images_placeholder = tf.get_default_graph().get_tensor_by_name(\"input:0\")\n",
    "        embeddings = tf.get_default_graph().get_tensor_by_name(\"embeddings:0\")\n",
    "        phase_train_placeholder = tf.get_default_graph().get_tensor_by_name(\"phase_train:0\")\n",
    "        embedding_size = embeddings.get_shape()[1]\n",
    "        \n",
    "        images = load_data(paths, image_size)\n",
    "        #plt.imshow(images[10])\n",
    "        \n",
    "        feed_dict = {images_placeholder:images, phase_train_placeholder:False }\n",
    "        emb_array = sess.run(embeddings, feed_dict=feed_dict)\n",
    "        print('emb_array.shape:')\n",
    "        print(emb_array.shape)\n",
    "        \n",
    "        X_train, X_test, y_train, y_test = train_test_split(emb_array, labels, test_size=.3, random_state=42)\n",
    "                      \n",
    "        classifier_filename_exp = os.path.expanduser('new_models.pkl')\n",
    "\n",
    "        # Train classifier\n",
    "        print('Training classifier')\n",
    "        #model = KNeighborsClassifier() # accuracy: 77.70%\n",
    "        #model = SVC(kernel='linear', probability=True)\n",
    "        #model = SVC(kernel='poly',degree=2,gamma=1,coef0=0,probability=True) # accuracy: 77.03%\n",
    "        model = SVC(kernel='poly',degree=10,gamma=1,coef0=0,probability=True) #accuracy: 87.16%\n",
    "        \n",
    "        model.fit(X_train, y_train)\n",
    "            \n",
    "        # Create a list of class names\n",
    "        class_names = [ cls.name.replace('_', ' ') for cls in dataset]\n",
    "        print(class_names)\n",
    "        \n",
    "        # Saving classifier model\n",
    "        with open(classifier_filename_exp, 'wb') as outfile:\n",
    "            pickle.dump((model, class_names), outfile)\n",
    "        print('Saved classifier model to file \"%s\"' % classifier_filename_exp)\n",
    "        \n",
    "        # 验证\n",
    "        with open(classifier_filename_exp, 'rb') as infile:\n",
    "            (model, class_names) = pickle.load(infile)\n",
    "        predict = model.predict(X_test) \n",
    "        accuracy = metrics.accuracy_score(y_test, predict)  \n",
    "        print ('accuracy: %.2f%%' % (100 * accuracy)  )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(493, 512)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 我们的预训练模型的输出是一个493行，512列的tensor，\n",
    "# 493是我们的数据集数目，512是每个图片最后得出的512个特征值，\n",
    "# 作为我们需要训练的二元分类模型的输入，这里我们使用的是SVC来训练我们的模型\n",
    "emb_array.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
